// Copyright 2011 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "depfile_parser.h"
#include "util.h"

#include <algorithm>

using namespace std;

DepfileParser::DepfileParser(DepfileParserOptions options)
    : options_(options)
{
}

// A note on backslashes in Makefiles, from reading the docs:
// Backslash-newline is the line continuation character.
// Backslash-# escapes a # (otherwise meaningful as a comment start).
// Backslash-% escapes a % (otherwise meaningful as a special).
// Finally, quoting the GNU manual, "Backslashes that are not in danger
// of quoting ‘%’ characters go unmolested."
// How do you end a line with a backslash?  The netbsd Make docs suggest
// reading the result of a shell command echoing a backslash!
//
// Rather than implement all of above, we follow what GCC/Clang produces:
// Backslashes escape a space or hash sign.
// When a space is preceded by 2N+1 backslashes, it is represents N backslashes
// followed by space.
// When a space is preceded by 2N backslashes, it represents 2N backslashes at
// the end of a filename.
// A hash sign is escaped by a single backslash. All other backslashes remain
// unchanged.
//
// If anyone actually has depfiles that rely on the more complicated
// behavior we can adjust this.
bool DepfileParser::Parse(string* content, string* err) {
    // in: current parser input point.
    // end: end of input.
    // parsing_targets: whether we are parsing targets or dependencies.
    char* in = &(*content)[0];
    char* end = in + content->size();
    bool have_target = false;
    bool parsing_targets = true;
    bool poisoned_input = false;
    bool is_empty = true;
    while (in < end) {
        bool have_newline = false;
        // out: current output point (typically same as in, but can fall behind
        // as we de-escape backslashes).
        char* out = in;
        // filename: start of the current parsed filename.
        char* filename = out;
        for (;;) {
            // start: beginning of the current parsed span.
            const char* start = in;
            char* yymarker = NULL;
            /*!re2c
            re2c:define:YYCTYPE = "unsigned char";
            re2c:define:YYCURSOR = in;
            re2c:define:YYLIMIT = end;
            re2c:define:YYMARKER = yymarker;

            re2c:yyfill:enable = 0;

            re2c:indent:top = 2;
            re2c:indent:string = "  ";

            nul = "\000";
            newline = '\r'?'\n';

            '\\\\'* '\\ ' {
              // 2N+1 backslashes plus space -> N backslashes plus space.
              int len = (int)(in - start);
              int n = len / 2 - 1;
              if (out < start)
                memset(out, '\\', n);
              out += n;
              *out++ = ' ';
              continue;
            }
            '\\\\'+ ' ' {
              // 2N backslashes plus space -> 2N backslashes, end of filename.
              int len = (int)(in - start);
              if (out < start)
                memset(out, '\\', len - 1);
              out += len - 1;
              break;
            }
            '\\'+ '#' {
              // De-escape hash sign, but preserve other leading backslashes.
              int len = (int)(in - start);
              if (len > 2 && out < start)
                memset(out, '\\', len - 2);
              out += len - 2;
              *out++ = '#';
              continue;
            }
            '\\'+ ':' [\x00\x20\r\n\t] {
              // Backslash followed by : and whitespace.
              // It is therefore normal text and not an escaped colon
              int len = (int)(in - start - 1);
              // Need to shift it over if we're overwriting backslashes.
              if (out < start)
                memmove(out, start, len);
              out += len;
              if (*(in - 1) == '\n')
                have_newline = true;
              break;
            }
            '\\'+ ':' {
              // De-escape colon sign, but preserve other leading backslashes.
              // Regular expression uses lookahead to make sure that no whitespace
              // nor EOF follows. In that case it'd be the : at the end of a target
              int len = (int)(in - start);
              if (len > 2 && out < start)
                memset(out, '\\', len - 2);
              out += len - 2;
              *out++ = ':';
              continue;
            }
            '$$' {
              // De-escape dollar character.
              *out++ = '$';
              continue;
            }
            '\\'+ [^\000\r\n] | [a-zA-Z0-9+,/_:.~()}{%=@\x5B\x5D!\x80-\xFF-]+ {
              // Got a span of plain text.
              int len = (int)(in - start);
              // Need to shift it over if we're overwriting backslashes.
              if (out < start)
                memmove(out, start, len);
              out += len;
              continue;
            }
            nul {
              break;
            }
            '\\' newline {
              // A line continuation ends the current file name.
              break;
            }
            newline {
              // A newline ends the current file name and the current rule.
              have_newline = true;
              break;
            }
            [^] {
              // For any other character (e.g. whitespace), swallow it here,
              // allowing the outer logic to loop around again.
              break;
            }
            */
        }

        int len = (int)(out - filename);
        const bool is_dependency = !parsing_targets;
        if (len > 0 && filename[len - 1] == ':') {
            len--;  // Strip off trailing colon, if any.
            parsing_targets = false;
            have_target = true;
        }

        if (len > 0) {
            is_empty = false;
            StringPiece piece = StringPiece(filename, len);
            // If we've seen this as an input before, skip it.
            std::vector<StringPiece>::iterator pos = std::find(ins_.begin(), ins_.end(), piece);
            if (pos == ins_.end()) {
                if (is_dependency) {
                    if (poisoned_input) {
                        *err = "inputs may not also have inputs";
                        return false;
                    }
                    // New input.
                    ins_.push_back(piece);
                } else {
                    // Check for a new output.
                    if (std::find(outs_.begin(), outs_.end(), piece) == outs_.end())
                        outs_.push_back(piece);
                }
            } else if (!is_dependency) {
                // We've passed an input on the left side; reject new inputs.
                poisoned_input = true;
            }
        }

        if (have_newline) {
            // A newline ends a rule so the next filename will be a new target.
            parsing_targets = true;
            poisoned_input = false;
        }
    }
    if (!have_target && !is_empty) {
        *err = "expected ':' in depfile";
        return false;
    }
    return true;
}
